"""
法院

每次出现验证码：更换代理IP
出现问题： 不能规避验证码
"""


import requests
from lxml import etree

def get_ip():
	# 获取私密代理IP API
	api = "https://dps.kdlapi.com/api/getdps"

	# 请求参数
	params = {
		"secret_id": "onf8uhahowkdthvnrftc",
		"signature": "ujeceyy9bsevdogk5216dbut6rnqbpjr",
		"num": 1,  # 提取数量
	}

	# 获取响应内容
	response = requests.get(api, params=params)
	return response.text

def get_proxies():
	proxy_ip = get_ip()
	# 用户名密码认证(私密代理/独享代理)
	username = "d4128411536"
	password = "njyzbgtb"
	proxies = {
		"http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip},
		"https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}
	}
	return proxies


proxies = get_proxies()
page = 1

while True:
	print(f"当前正在爬取第{page}页")
	res = requests.get(f"https://www.bjcourt.gov.cn/zxxx/indexOld.htm?st=1&zxxxlx=100013007&bzxrlx=&bzxrxm=&zrr=&frhqtzz=&jbfyId=&ah=&dqxh=22&page={page}", proxies=proxies)
	tree = etree.HTML(res.text)
	items = tree.xpath('//table[@class="table_list_02"]/tr[position()>1]')
	if items:
		for item in items:
			print(item.xpath('./td/text()'))
		page += 1
		if page == 579:
			break
	else:
		print(f"发现验证码,更换代理IP")
		proxies = get_proxies()